library(tidyverse)
library(lubridate)
library("robustHD")
library('cowplot')
theme_set(theme_bw())
options(digits=4)

# Figure 2 #
load("dta_single.rdata")

dta_single = dta_single %>%
  filter(year >=2010 & year <= 2019)

all_ori9 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    keep = sum(type=='arrest') > 0 & sum(type=='post') > 0
  ) %>%
  filter(keep)
all_ori9 = unique(all_ori9$ori9)
dta_single = dta_single %>%
  filter(ori9 %in% all_ori9)

d1 = dta_single %>%
  group_by(ori9, crime) %>%
  summarize(
    mean_black_post = sum(black > 0 & type=="post") / sum(type=="post"),
    mean_black_arrest = sum(black > 0 & type=="arrest") / sum(type=="arrest")
  ) %>%
  filter(!is.na(mean_black_post) & !is.na(mean_black_arrest)) %>%
  ungroup() %>%
  group_by(crime) %>%
  summarize(
    mean_post = mean(mean_black_post),
    mean_arrest = mean(mean_black_arrest)
  )

d2 = dta_single %>%
  group_by(ori9, crime_class) %>%
  summarize(
    mean_black_post = sum(black > 0 & type=="post") / sum(type=="post"),
    mean_black_arrest = sum(black > 0 & type=="arrest") / sum(type=="arrest")
  ) %>%
  filter(!is.na(mean_black_post) & !is.na(mean_black_arrest)) %>%
  ungroup() %>%
  group_by(crime_class) %>%
  summarize(
    mean_post = mean(mean_black_post),
    mean_arrest = mean(mean_black_arrest)
  )
colnames(d2)[1] = "crime"

d3 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    mean_black_post = sum(black > 0 & type=="post") / sum(type=="post"),
    mean_black_arrest = sum(black > 0 & type=="arrest") / sum(type=="arrest")
  ) %>%
  filter(!is.na(mean_black_post) & !is.na(mean_black_arrest)) %>%
  ungroup() %>%
  summarize(
    mean_post = mean(mean_black_post),
    mean_arrest = mean(mean_black_arrest)
  )
d3$crime = "all"

dta_plt = bind_rows(d1, d2)
dta_plt = bind_rows(dta_plt, d3)
dta_plt = dta_plt %>%
  gather(pa, value, mean_post:mean_arrest)

colnames(dta_plt)[1] = "type"
dta_plt1 = dta_plt
dta_plt = dta_plt %>%
  mutate(
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime"),
    pa_label = ifelse(pa=="mean_arrest", "Arrests", "Posts")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )

dta_plt$type_label = factor(dta_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
dta_plt$class_label = factor(dta_plt$class_label, rev(c("Property", "Violence", "All Crime")))
zp2 = dta_plt %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_point(aes(x = type_label, y = value, shape=pa_label), size=3)
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_line(aes(x = type_label, y = value),
                      lwd = 1)

zp2 = zp2 + coord_flip() + theme_bw() + ylab("Proportion Black Suspects") + xlab("") 
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))

zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3"))
zp2 = zp2 + scale_shape_manual(values=c(1, 16))
zp2 = zp2 + scale_fill_manual(values=c("white", "black"))
zp2 = zp2 +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2 = zp2 + guides(shape=guide_legend(title="Type"))
zp2
ggsave(filename = "overreporting.png", dpi=700)


# Figure 3 #
dta_all_ori9 = read_csv("all_exposure_by_ori9.csv")

dta_plt = dta_all_ori9 %>%
  filter(!is.na(black_post) & !is.na(black_arrest)) %>%
  group_by(type) %>%
  summarize(
    mean_post = mean(black_post),
    mean_arrest = mean(black_arrest)
  )  

dta_plt = dta_plt %>%
  gather(pa, value, mean_post:mean_arrest)

dta_plt = dta_plt %>%
  mutate(
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime"),
    pa_label = ifelse(pa=="mean_arrest", "Arrests", "Posts")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )

dta_plt$type_label = factor(dta_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
dta_plt$class_label = factor(dta_plt$class_label, rev(c("Property", "Violence", "All Crime")))

zp2 = dta_plt %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_point(aes(x = type_label, y = value, shape=pa_label), size=3)
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_line(aes(x = type_label, y = value),
                      lwd = 1)

zp2 = zp2 + coord_flip() + theme_bw() + ylab("Proportion Black Suspects") + xlab("") 
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3"))
zp2 = zp2 + scale_shape_manual(values=c(1, 16))
zp2 = zp2 + scale_fill_manual(values=c("white", "black"))
zp2 = zp2 +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2 = zp2 + guides(shape=guide_legend(title="Type"))
zp2
ggsave(filename = "overexposure.png", dpi=700)


# Figure 4 #
# The code for Figure 4 was authored by a research assistant and will be uploaded separately soon.


# Figure 5 #
dta_ori9 = read.csv("overexposure_by_ori9.csv")

dta_fips = dta_ori9 %>%
  group_by(fips) %>%
  summarize(
    lrp = weighted.mean(overexposure, w=(n_arrests), na.rm=T),
    republican_vote = republican_vote[1],
    black_pop = black_pop[1]
  )

dta_ori9$lrp_new = dta_ori9$overexposure

dta_fips$lrp_new = dta_fips$lrp


p1 = dta_fips %>%
  rename(lrp_wins = lrp_new) %>%
  filter(!is.na(lrp) & !is.na(republican_vote)) %>%
  mutate(
    party = ifelse(republican_vote >= 0.5, "Republican", "Democrat"),
  ) %>%
  ggplot(aes(x=republican_vote, y=lrp_wins)) +
  geom_point(aes(col=party), size=1, alpha=0.5, shape=16) +
  geom_smooth(method=loess, col="black", linetype = "dashed") +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  ylab("Local Overexposure") +
  xlab("Mean Share of Republican Vote") + 
  scale_color_manual(values = c("#0093c8", "#ff8171")) +
  guides(col=guide_legend(title="Party"))

p1
ggsave(filename = "overexposure_by_party.png", dpi=700,
       width=5483, height=4746, units = "px")

p2 = dta_fips %>%
  mutate(lrp_wins = winsorize(lrp_new, const=5),
         party = ifelse(republican_vote >= 0.5, "Republican", "Democrat")) %>%
  filter(!is.na(lrp_wins) & !is.na(republican_vote) & !is.na(black_pop)) %>%
  ggplot(aes(x=black_pop/100, y=lrp_wins, col=party)) +
  geom_point(size=1, shape=16, alpha=0.5) +
  geom_smooth(method=loess) +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  scale_color_manual(values = c("#0093c8", "#ff8171")) +
  ylab("Local Overexposure") +
  xlab("Share of Black Population") +
  guides(col=guide_legend(title="Party"))
p2
ggsave(filename = "overexposure_by_black_population.png", dpi=700,
       width=5483, height=4746, units = "px")



p1 = p1+theme(axis.title.y=element_blank()) +
  ylim(-0.8, 0.8) + xlim(0, 1)
p2 = p2+theme(axis.title.y=element_blank()) +
  ylim(-0.8, 0.8) + xlim(0, 1)
prow = plot_grid(p1 + theme(legend.position="none"), 
                  p2 + theme(legend.position="none"),
                  align = 'vh',
                  hjust = -1,
                  nrow = 2)
legend = get_legend(p1 + theme(legend.direction = "horizontal",legend.justification=c(0.5, 1) ,legend.box.just = "bottom"))

p = plot_grid( prow, legend,  ncol = 1, rel_heights = c(1, 0.2)) +
  draw_label("Local Overexposure", x=0, y=0.6, angle=90, vjust= -1) +
  theme(plot.margin = margin(30, 30, -30, 30))
p

save_plot(plot=p, filename = "Figure_5_combined.pdf", dpi=400,
          base_width=5483, base_height=9600, units = "px")




# Figure S1 #
results_all = read_csv('all_results_alternative_weights.csv')

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "none" = "None",
                                  "sc" = "Followers",
                                  "int" = "Interactions",
                                  "shares" = "Shares"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )

results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))
results_plt$weights_label = factor(results_plt$weights_label, rev(c("Shares", "Interactions", "Followers", "None")))

zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                               ymax = eupper1),
                           lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                ymin = elower196, ymax = eupper196),
                            lwd = 1/2, position = position_dodge(width = 1/2),
                            shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("") + facet_wrap(~weights_label)
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "regressions_alternative_weights.png", dpi=700)


# Figure S2 #
dta_decay = data.frame(km = NA, miles = NA, w = NA, dec = NA)
exp_weight = function(x, decay=0.00005, threshold = 483){
  y = exp(-decay*(x^2))
  y[x > threshold] = 0
  return(y)
}


for(d in c(0.00005, 0.0001, 0.005)){
  km = seq(1, 484, by=1)
  miles = km*0.621371
  w = exp_weight(km, decay=d)
  dec = rep(d, length(km))
  new_df = data.frame(km = km, miles = miles, w = w, dec = dec)
  dta_decay = bind_rows(dta_decay, new_df)
}
dta_decay = dta_decay[-1,]
dta_decay$dec = as.factor(dta_decay$dec)

dta_decay %>%
  ggplot(aes(x=miles, y=w, col=dec)) +
  geom_line() +
  guides(col=guide_legend(title="Decay")) +
  scale_color_manual(values = c("Darkgreen", "Purple", "Darkorange3")) +
  xlab("Distance (Miles)") + ylab("Weight")
ggsave(filename = "distance_decay.png", dpi=700)


# Figure S3 #
results1 = read_csv('all_results_fixed_weights_distance_00005.csv')
results1$decay = "Decay = 0.00005"
results2 = read_csv('all_results_fixed_weights_distance_0001.csv')
results2$decay = "Decay = 0.0001"
results3 = read_csv('all_results_fixed_weights_distance_0005.csv')
results3$decay = "Decay = 0.0005"
results1
results1[results1$type=='murder' & results1$agency==0,]$estimate
results2[results2$type=='murder' & results2$agency==0,]$estimate
results3[results3$type=='murder' & results3$agency==0,]$estimate

results_all = bind_rows(results1, results2, results3)

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))


zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                               ymax = eupper1),
                           lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                ymin = elower196, ymax = eupper196),
                            lwd = 1/2, position = position_dodge(width = 1/2),
                            shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("") + facet_wrap(~decay)
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "overexposure_by_decay.png", dpi=700)


# Figure S4 #
results_all = read_csv('all_results_pop_weights_distance_00005.csv')

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))
results_plt$decay = "Decay = 0.00005"

zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                               ymax = eupper1),
                           lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                ymin = elower196, ymax = eupper196),
                            lwd = 1/2, position = position_dodge(width = 1/2),
                            shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("")# + facet_wrap(~decay)
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2 = zp2+facet_wrap(~decay)
zp2
ggsave(filename = "overreporting_by_decay_pop.png", dpi=700)

# Figure S5
results_all = read_csv('all_results_fixed_weights_distance_00005_nibrs.csv')

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))



zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                                ymax = eupper1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                 ymin = elower196, ymax = eupper196),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("")
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 + theme(text=element_text(size=12))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(12, 3), 13, rep(12, 4), 13, 13)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "regressions_distance_decay_nibrs.png", dpi=700,
       width=5483, height=4251, units = "px")

# Figure S6
results1 = read_csv('all_results_fixed_weights_distance_00005_arrest.csv')
results1$decay = "Decay = 0.00005"
results2 = read_csv('all_results_fixed_weights_distance_0001_arrest.csv')
results2$decay = "Decay = 0.0001"
results3 = read_csv('all_results_fixed_weights_distance_0005_arrest.csv')
results3$decay = "Decay = 0.0005"
results1[results1$type=='murder' & results1$agency==0,]$estimate
results2[results2$type=='murder' & results2$agency==0,]$estimate
results3[results3$type=='murder' & results3$agency==0,]$estimate

results_all = bind_rows(results1, results2, results3)

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))


zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                                ymax = eupper1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                 ymin = elower196, ymax = eupper196),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("") + facet_wrap(~decay)
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(11, 3), 12, rep(11, 4), 12, 12)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "overexposure_by_decay_arrest.png", dpi=700)


# Figure S8 # - This Figure comes from Ben Grunwald, so the coding is slightly different

library(dplyr)
library(ggplot2)

df = read.csv("overexposure_by_ori9.csv")

#Histogram of distribution of overreporting

median = median(df$overexposure, na.rm=TRUE)

per.2 = quantile(df$overexposure, .2, na.rm=TRUE)

per.8 = quantile(df$overexposure, .8, na.rm=TRUE)

ggplot(aes(overexposure), data=df)+
  geom_histogram(bins=30, fill="slategray2", color="black")+
  theme_bw()+
  theme(text=element_text(size=20))+
  geom_vline(xintercept = median, colour="navy", size=1.5, linetype="longdash")+
  geom_vline(xintercept = per.2, colour="indianred2", size=1.4)+
  geom_vline(xintercept = per.8, colour="indianred2", size=1.4)+
  xlab("Agency Overexposure Score")+
  ylab("Number of Agencies")
ggsave("overexposure_histogram.png", width=8, height = 4.8, units = "in")


# Figure S9
load("dta_single.rdata")

dta_single = dta_single %>%
  filter(year >=2010 & year <= 2019)
all_ori9 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    keep = sum(type=='arrest') > 0 & sum(type=='post') > 0
  ) %>%
  filter(keep)
all_ori9 = unique(all_ori9$ori9)

dta_single = dta_single %>%
  filter(ori9 %in% all_ori9)

dta_or = dta_single %>%
  group_by(ori9) %>%
  summarize(
    mean_black_post = sum(black > 0 & type=="post") / sum(type=="post"),
    mean_black_arrest = sum(black > 0 & type=="arrest") / sum(type=="arrest")
  ) %>%
  mutate(
    lor = mean_black_post - mean_black_arrest
  )

dta_ori9 = read.csv("overexposure_by_ori9.csv") %>%
  filter(!is.na(overexposure)) %>%
  mutate(
    so = grepl("sheriff", agency_name)
  ) %>%
  left_join(dta_or[,c("ori9", "lor")])

dta_fips = dta_ori9 %>%
  group_by(fips) %>%
  summarize(
    lor = weighted.mean(lor, w=(n_arrests), na.rm=T),
    republican_vote = republican_vote[1],
    black_pop = black_pop[1]
  )

dta_fips %>%
  mutate(
    party = ifelse(republican_vote >= 0.5, "Republican", "Democrat"),
  ) %>%
  group_by(party, !is.na(lor)) %>%
  summarize(
    non_report_rate = mean(is.na(lor)),
    n = n()
  )

dta_or %>%
  left_join(dta_ori9[,c("ori9" , "republican_vote")]) %>%
  filter(!is.na(republican_vote)) %>%
  mutate(
    party = ifelse(republican_vote >= 0.5, "Republican", "Democrat"),
  ) %>%
  group_by(party) %>%
  summarize(
    mean_lor = mean(mean_black_post - mean_black_arrest, na.rm=T)
  )

dta_ori9$lor_new = dta_ori9$lor

dta_fips$lor_new = dta_fips$lor

dta_fips %>%
  filter(!is.na(lor) & !is.na(republican_vote)) %>%
  mutate(lor_wins = lor_new) %>%
  mutate(
    party = ifelse(republican_vote >= 0.5, "Republican", "Democrat"),
  ) %>%
  ggplot(aes(x=republican_vote, y=lor_wins)) +
  geom_point(aes(col=party), size=1, alpha=0.5, shape=16) +
  geom_smooth(method=loess, col="black", linetype = "dashed") +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  ylab("Local Overreporting") +
  xlab("Mean Share of Republican Vote") + 
  scale_color_manual(values = c("#0093c8", "#ff8171")) +
  guides(col=guide_legend(title="Party"))+
  theme(text=element_text(size=12))
ggsave(filename = "overreporting_by_party.png", dpi=700,
       width=5483, height=4746, units = "px")


dta_fips %>%
  filter(!is.na(lor) & !is.na(republican_vote)) %>%
  mutate(lor_wins = lor_new) %>%
  mutate(party = ifelse(republican_vote >= 0.5, "Republican", "Democrat")) %>%
  filter(!is.na(lor_wins) & !is.na(republican_vote) & !is.na(black_pop)) %>%
  ggplot(aes(x=black_pop/100, y=lor_wins)) +
  geom_point(aes(col=party), size=1, shape=16, alpha=0.5) +
  geom_smooth(method="loess", aes(col=party)) +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  scale_color_manual(values = c("#0093c8", "#ff8171")) +
  ylab("Local Overreporting") +
  xlab("Share of Black Population") +
  guides(col=guide_legend(title="Party"))+
  theme(text=element_text(size=12))
ggsave(filename = "overreporting_by_black_pop.png", dpi=700,
       width=5483, height=4746, units = "px")

# Figure S10 #

dta_ori9 = read.csv("overexposure_by_ori9.csv") %>%
  filter(!is.na(overexposure)) |> 
mutate(
  so = grepl("sheriff", agency_name)
) |> 
  left_join(dta_or[,c("ori9", "lor")])


dta_fips = dta_ori9 %>%
  group_by(fips) %>%
  summarize(
    lrp = weighted.mean(overexposure, w=(n_arrests), na.rm=T),
    republican_vote = republican_vote[1],
    black_pop = black_pop[1]
  )

load("dta_single.rdata")
dta_single = dta_single %>%
  filter(year >=2010 & year <= 2019)
all_ori9 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    keep = sum(type=='arrest') > 0 & sum(type=='post') > 0
  ) %>%
  filter(keep)
all_ori9 = unique(all_ori9$ori9)
dta_single = dta_single %>%
  filter(ori9 %in% all_ori9)

d3 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    mean_black_post = sum(black > 0 & type=="post") / sum(type=="post"),
    mean_black_arrest = sum(black > 0 & type=="arrest") / sum(type=="arrest")
  ) %>%
  mutate(
    lor = mean_black_post - mean_black_arrest
  )



 
dta_ori9 %>%
  ggplot(aes(x=black_officers, y=lor)) +
  geom_point(size=1, alpha=0.3, shape=16) +
  geom_smooth(method=loess, col="blue", linetype = "dashed") +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  ylab("Local Overreporting") +
  xlab("Share of Black Officers") +
  theme(text=element_text(size=12))
ggsave(filename = "representation_or_officers.png", dpi=700,
       width=5483, height=4746, units = "px") 


# Figure S11 #

dta_fips_so = dta_ori9 %>%
  group_by(fips, so) %>%
  summarize(
    lor = weighted.mean(lor, w=(n_arrests), na.rm=T),
    republican_vote = republican_vote[1],
    black_pop = black_pop[1]
  )
dta_fips_so$lor_new = dta_fips_so$lor

dta_fips_so %>%
  mutate(lor_wins = lor_new) %>%
  filter(!is.na(lor) & !is.na(republican_vote)) %>%
  mutate(
    party = ifelse(republican_vote >= 0.5, "Republican", "Democrat"),
    so = recode_factor(as.factor(so), 
                       "FALSE" = "Police Department", 
                       "TRUE" = "Sheriff's Office")
  ) %>%
  ggplot(aes(x=republican_vote, y=lor_wins)) +
  geom_point(aes(col=party), size=1, alpha=0.5, shape=16) +
  geom_smooth(method=loess, col="black", aes(lty=so)) +
  geom_hline(yintercept=0, linetype="dotted", color = "black") +
  ylab("Local Overreporting") +
  xlab("Mean Share of Republican Vote") + 
  scale_color_manual(values = c("#0093c8", "#ff8171")) +
  scale_linetype_manual(values = c(2, 3)) +
  guides(col=guide_legend(title="Party"),
         linetype=guide_legend(title="Agency Type"))+
  theme(text=element_text(size=12))

ggsave("overreporting_by_party_sheriffs.png", dpi=700,
       width=5483, height=4746, units = "px")


# Figure S12 #
load("dta_single.rdata")

dta_single = dta_single %>%
  filter(year >=2010 & year <= 2019)

all_ori9 = dta_single %>%
  group_by(ori9) %>%
  summarize(
    keep = sum(type=='arrest') > 0 & sum(type=='post') > 0
  ) %>%
  filter(keep)
all_ori9 = unique(all_ori9$ori9)
dta_single = dta_single %>%
  filter(ori9 %in% all_ori9)

dta_single %>%
  filter(type=="post") %>%
  summarize(
    mean_viol = mean(crime_class=="viol"),
    mean_prop = mean(crime_class=='prop'),
    mean_black = mean(black > 0)
  )

dta_plt1 = dta_single %>%
  filter(type=="post") %>%
  group_by(year) %>%
  summarize(
    total = n()
  ) %>%
  mutate(
    cs_post = cumsum(total)
  ) %>%
  select(year, cs_post)

dta_plt2 = dta_single %>%
  filter(type=="arrest") %>%
  group_by(year) %>%
  summarize(
    total = n()
  ) %>%
  mutate(
    cs_arrest = cumsum(total)
  ) %>%
  select(year, cs_arrest)

dta_plt3 = data.frame(year=NA, cs_agency=NA)
all_years = unique(dta_plt2$year)

for(i in c(1:length(all_years))){
  the_year = all_years[i]
  rel_dta = dta_single %>%
    filter(year <= the_year & type=="post")
  dta_plt3[i,] = c(the_year, length(unique(rel_dta$ori9)))
}
rm(rel_dta)
dta_plt = left_join(dta_plt1, dta_plt2)
dta_plt = left_join(dta_plt, dta_plt3)
dta_plt = gather(dta_plt, type, n, cs_post:cs_agency)
dta_plt = dta_plt %>%
  mutate(type_label = recode_factor(type,
                                    "cs_post" = "Posts",
                                    "cs_arrest" = "Arrests",
                                    "cs_agency" = "Agencies"
  )
  )

dta_plt = dta_plt %>%
  filter(type_label != "Arrests") %>%
  ggplot(aes(x=year, y=n, col=type_label)) +
  geom_point() +
  geom_line() + facet_wrap(~type_label, scales="free") +
  ylab("Cumulative Sum") +
  xlab("") +
  scale_color_manual(values = c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
dta_plt
ggsave(filename = "cumulative_posts_arrests_agencies.png", dpi=700)


# Figure S13 #

results_all = read_csv('all_results_fixed_weights_distance_00005_pop10k.csv')

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))



zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                                ymax = eupper1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                 ymin = elower196, ymax = eupper196),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("")
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 + theme(text=element_text(size=12))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(12, 3), 13, rep(12, 4), 13, 13)
                             
  ))
zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "regressions_distance_decay_pop10k.png", dpi=700,
       width=5483, height=4251, units = "px")   


# Figure S14 #
results_all = read_csv('all_results_fixed_weights_distance_00005_allmonths.csv')

results_plt = results_all %>%
  mutate(
    eupper1 = (estimate+1*cse),
    elower1 = (estimate-1*cse),
    eupper196 = (estimate+1.96*cse),
    elower196 = (estimate-1.96*cse),
    agency_label = recode_factor(agency,
                                 "0" = "No Agency",
                                 "1" = "Agency"
    ),
    type_label = recode_factor(type,
                               "all" = "All Crime",
                               "prop" = "All Property",
                               "viol" = "All Violence",
                               "murder" = "Murder",
                               "rape" = "Rape",
                               "robbery" = "Robbery",
                               "agg_assault" = "Aggr. Assault",
                               "auto_theft" = "Auto Theft",
                               "burglary" = "Burglary",
                               "theft" = "Theft"
    ),
    weights_label = recode_factor(weights,
                                  "d" = "Distance"),
    class_label = ifelse(type %in% c("murder", "rape", "robbery", "agg_assault", "viol"), "Violence", "All Crime")
  ) %>%
  mutate(
    class_label = replace(class_label, type %in% c("auto_theft", "burglary", "theft", "prop"), "Property")
  )
results_plt$type_label = factor(results_plt$type_label, rev(c("All Crime", "All Violence", "Murder", "Rape", "Robbery", "Aggr. Assault", "All Property", "Burglary", "Theft", "Auto Theft")))
results_plt$class_label = factor(results_plt$class_label, rev(c("Property", "Violence", "All Crime")))



zp2 = results_plt %>%
  filter(agency == 1) %>%
  ggplot(aes(col = class_label))
zp2 = zp2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
zp2 = zp2 + geom_linerange(aes(x = type_label, ymin = elower1,
                                ymax = eupper1),
                            lwd = 1, position = position_dodge(width = 1/2))
zp2 = zp2 + geom_pointrange(aes(x = type_label, y = estimate,
                                 ymin = elower196, ymax = eupper196),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE", fatten=3)
zp2 = zp2 + coord_flip() + theme_bw() + ylab("Average Overexposure") + xlab("")
zp2 = zp2 + ggtitle("") + guides(col=guide_legend(title="Crime Category"))
zp2 = zp2 + theme(text=element_text(size=12))
zp2 = zp2 +theme(
  axis.text.y = element_text(face = c(rep("plain", 3), "bold", rep("plain", 4), "bold", "bold"),
                             colour=c(rep("Darkorange3", 4), rep("Purple", 5), "Darkgreen"),
                             size=c(rep(12, 3), 13, rep(12, 4), 13, 13)
                             
  ))

zp2 = zp2 + scale_color_manual(values=c("Darkgreen", "Purple", "Darkorange3")) +
  guides(fill = guide_legend(override.aes = list(color = NA)), 
         color = FALSE, 
         shape = FALSE)
zp2
ggsave(filename = "regressions_distance_decay_allmonths.png", dpi=700,
       width=5483, height=4251, units = "px")   


